%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# Key R installations, for harmonic mean p-value package
import rpy2
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector, FloatVector
# Significance Threshold
ALPHA = 0.05
# uncomment for installing the first time
utils = rpackages.importr('utils')
utils.install_packages(StrVector(["harmonicmeanp"]))
hmp = rpackages.importr('harmonicmeanp')
harmonic_mean_p = robjects.r['p.hmp']
qharmonicmeanp = robjects.r['qharmonicmeanp']
--- Please select a CRAN mirror for use in this session ---
R[write to console]: trying URL 'https://mirror.las.iastate.edu/CRAN/bin/macosx/contrib/4.1/harmonicmeanp_3.0.tgz' R[write to console]: Content type 'application/x-gzip' R[write to console]: length 565587 bytes (552 KB) R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: = R[write to console]: R[write to console]: downloaded 552 KB
The downloaded binary packages are in /var/folders/t8/_4x3pyf564g6ngsshrkp589m0000gn/T//Rtmp0dw9Cf/downloaded_packages
df = pd.read_csv("MM_Fluency_Data.csv", index_col=0)
df.head()
| MM_01 | MM_02 | MM_05 | MM_08 | MM_10 | MM_11 | MM_13 | MM_14 | MM_15 | MM_16 | MM_17 | MM_18 | MM_19 | MM_20 | MM_23 | MM_24 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Modes | ||||||||||||||||
| Baseline | 7.75 | 3.12 | 9.81 | 2.27 | 2.12 | 1.91 | 6.29 | 5.70 | 3.96 | 3.21 | 6.13 | 2.53 | 3.34 | 7.28 | 3.07 | 7.43 |
| NF_final | 3.55 | 2.01 | 4.58 | 1.00 | 0.50 | 1.74 | 5.06 | 3.36 | 3.55 | 3.02 | 3.62 | 2.43 | 2.99 | 5.90 | 1.38 | 3.70 |
| Raw voice | 5.56 | 2.78 | 9.16 | 1.72 | 0.47 | 0.79 | 3.20 | 2.53 | 1.23 | 2.48 | 4.80 | 2.71 | 1.82 | 6.09 | 3.32 | 6.25 |
| Delay | 4.28 | 1.39 | 11.66 | 1.96 | 1.83 | 0.32 | 0.98 | 2.45 | 2.93 | 1.45 | 3.96 | 2.11 | 1.72 | 8.33 | 1.47 | 3.76 |
| Pitch Shift | 3.95 | 1.25 | 13.58 | 1.13 | 1.83 | 1.49 | 1.20 | 4.15 | 1.49 | 2.93 | 4.00 | 2.44 | 0.35 | 7.85 | 1.94 | 2.19 |
plt.figure(figsize=(15, 6))
sns.set_context('talk')
ax = sns.boxplot(data=df.T, width=0.35)
ax.set_xticklabels(ax.get_xticklabels(),rotation=90)
plt.ylabel("% dis/syl (lower is better)");
dfp = 100 * (df.loc["Baseline"] - df)/df.loc["Baseline"]
dfp.drop("Baseline", inplace=True)
plt.figure(figsize=(15, 6))
sns.set_context('talk')
ax = sns.boxplot(data=dfp.T, width=0.5)
ax.set_xticklabels(ax.get_xticklabels(),rotation=30)
ax = sns.swarmplot(data=dfp.T, color="0.25", size=10)
ax.set_xticklabels(ax.get_xticklabels(),rotation=30)
# sns.lineplot(data=dfp, legend=False)
plt.ylabel("% Improvement in dis/syl");
import plotly.express as px
fig = px.parallel_coordinates(df.T,
color_continuous_scale=px.colors.diverging.Tealrose,
color_continuous_midpoint=2)
fig.show()
import plotly.express as px
fig = px.parallel_coordinates(dfp.T,
color_continuous_scale=px.colors.diverging.Tealrose,
color_continuous_midpoint=2)
fig.show()
from scipy.stats import wilcoxon
import numpy as np
CONTROLS = ['Raw voice', 'Delay', 'Pitch Shift']
NON_FEEDBACK_MODES = ['Baseline', 'NF_final']
no_nf_modes = df.index.drop(NON_FEEDBACK_MODES)
pvalues = np.nan * np.ones((1, len(no_nf_modes)))
pvalues = pvalues.astype(np.float64)
for idx, mode in enumerate(no_nf_modes):
p = wilcoxon(df.loc['Baseline'], df.loc[mode], alternative="greater")
pvalues[0, idx] = p.pvalue
pvaluedf_baseline = pd.DataFrame(pvalues, columns=no_nf_modes, index=["Baseline"])
pvaluedf_baseline
| Modes | Raw voice | Delay | Pitch Shift | Bubbles | Pop | Retune | DJ | Piano | Harmony | Reverb | Whisper |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Baseline | 0.000076 | 0.002579 | 0.003815 | 0.000015 | 0.000015 | 0.000015 | 0.000046 | 0.006546 | 0.000015 | 0.000015 | 0.000015 |
def calc_hmp(df, L=None, alpha=ALPHA):
p_value_arr = [val for val in df.values.ravel() if not np.isnan(val)]
L = L or len(p_value_arr)
w = FloatVector([1/L] * len(p_value_arr))
hmp = harmonic_mean_p(FloatVector(p_value_arr), w, L=L)[0]
hmp = hmp/np.sum(w)
if hmp < alpha:
adj_p = [harmonic_mean_p(FloatVector([val]), 1/L, L=L)[0]/(1/L) for val in p_value_arr]
adj_df = df.copy()
adj_p_idx = [idx for idx,val in enumerate(df.values.ravel()) if not np.isnan(val)]
adj_df.values.ravel()[adj_p_idx] = adj_p
else:
adj_df = None
return hmp, adj_df
def style_df(pvaluedf, hmp, alpha=ALPHA):
s = pvaluedf.style
s.set_table_styles([ # create internal CSS classes
{'selector': '.True', 'props': 'background-color: #e6ffe6;'},
{'selector': '.False', 'props': 'background-color: #ffe6e6;'},
{'selector': '.NaN', 'props': 'background-color: #000000;'},
], overwrite=False)
cell_color = pd.DataFrame(((pvaluedf < ALPHA) & (hmp < ALPHA)).values.astype(str),
index=pvaluedf.index,
columns=pvaluedf.columns)
cell_color[np.isnan(pvaluedf)] = "NaN"
s.set_td_classes(cell_color)
return s
L = pvaluedf_baseline.shape[1]
hmp, adj_p = calc_hmp(pvaluedf_baseline, L)
print(hmp)
style_df(adj_p, hmp)
2.565127582549799e-05
| Modes | Raw voice | Delay | Pitch Shift | Bubbles | Pop | Retune | DJ | Piano | Harmony | Reverb | Whisper |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Baseline | 0.000846 | 0.033836 | 0.053651 | 0.000168 | 0.000168 | 0.000168 | 0.000506 | 0.105463 | 0.000168 | 0.000168 | 0.000168 |
hmp, adj_p = calc_hmp(pvaluedf_baseline[['Delay', 'Pitch Shift']], L)
print(hmp)
style_df(adj_p, hmp)
0.018950362607662957
| Modes | Delay | Pitch Shift |
|---|---|---|
| Baseline | 0.033836 | 0.053651 |
hmp, adj_p = calc_hmp(pvaluedf_baseline[['Bubbles', 'Pop', 'Retune', 'DJ', 'Piano', 'Harmony', 'Reverb', 'Whisper']], L)
print(hmp)
style_df(adj_p, hmp)
2.6501439002706487e-05
| Modes | Bubbles | Pop | Retune | DJ | Piano | Harmony | Reverb | Whisper |
|---|---|---|---|---|---|---|---|---|
| Baseline | 0.000168 | 0.000168 | 0.000168 | 0.000506 | 0.105463 | 0.000168 | 0.000168 | 0.000168 |
no_controls_or_non_feedback_modes = df.index.drop(NON_FEEDBACK_MODES + CONTROLS)
pvalues = np.nan * np.ones((len(CONTROLS), len(no_controls_or_non_feedback_modes)))
pvalues = pvalues.astype(np.float64)
for idx1, mode1 in enumerate(CONTROLS):
for idx2, mode2 in enumerate(no_controls_or_non_feedback_modes):
if mode1 == mode2:
continue
p21 = wilcoxon(df.loc[mode1], df.loc[mode2], alternative="greater")
pvalues[idx1, idx2] = p21.pvalue
pvaluedf = pd.DataFrame(pvalues, columns=no_controls_or_non_feedback_modes, index=CONTROLS)
/usr/local/lib/python3.7/dist-packages/scipy/stats/morestats.py:3141: UserWarning: Exact p-value calculation does not work if there are ties. Switching to normal approximation.
hmp, adj_p = calc_hmp(pvaluedf)
print(hmp)
style_df(adj_p, hmp)
0.0005507495425260468
| Modes | Bubbles | Pop | Retune | DJ | Piano | Harmony | Reverb | Whisper |
|---|---|---|---|---|---|---|---|---|
| Raw voice | 0.016769 | 0.022156 | 0.009125 | 0.004593 | 0.912323 | 0.000153 | 0.000214 | 0.000046 |
| Delay | 0.298294 | 0.004593 | 0.010696 | 0.064865 | 0.965428 | 0.000839 | 0.000839 | 0.000504 |
| Pitch Shift | 0.247711 | 0.028839 | 0.005493 | 0.079529 | 0.922182 | 0.001343 | 0.000839 | 0.000290 |
hmp_vals = []
adj_ps = []
for mode in pvaluedf.columns:
hmp, adj_p = calc_hmp(pvaluedf[[mode]].T, L=int(np.prod(pvaluedf.shape)-pvaluedf.shape[0]))
if adj_p is not None:
hmp_vals.append(hmp)
adj_ps.append(adj_p)
style_df(pd.concat(adj_ps).T, (np.array(hmp_vals)[:,None]*np.ones((1, len(adj_ps[0])))).T)
| Modes | Harmony | Reverb | Whisper |
|---|---|---|---|
| Raw voice | 0.003297 | 0.004662 | 0.000971 |
| Delay | 0.020065 | 0.020065 | 0.011483 |
| Pitch Shift | 0.034325 | 0.020065 | 0.006403 |
no_nf_modes = df.index.drop(NON_FEEDBACK_MODES)
pvalues = np.nan * np.ones((len(no_nf_modes), len(no_nf_modes)))
pvalues = pvalues.astype(np.float64)
for idx1, mode1 in enumerate(no_nf_modes):
for idx2, mode2 in enumerate(no_nf_modes):
if mode1 == mode2:
continue
p21 = wilcoxon(df.loc[mode1], df.loc[mode2], alternative="greater")
pvalues[idx1, idx2] = p21.pvalue
pvaluedf = pd.DataFrame(pvalues, columns=no_nf_modes, index=no_nf_modes)
/usr/local/lib/python3.7/dist-packages/scipy/stats/morestats.py:3141: UserWarning: Exact p-value calculation does not work if there are ties. Switching to normal approximation.
hmp, adj_p = calc_hmp(pvaluedf)
print(hmp)
style_df(adj_p, hmp)
0.0006454588599518488
| Modes | Raw voice | Delay | Pitch Shift | Bubbles | Pop | Retune | DJ | Piano | Harmony | Reverb | Whisper |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Modes | |||||||||||
| Raw voice | nan | 0.231873 | 0.298294 | 0.016769 | 0.022156 | 0.009125 | 0.004593 | 0.912323 | 0.000153 | 0.000214 | 0.000046 |
| Delay | 0.783401 | nan | 0.600883 | 0.298294 | 0.004593 | 0.010696 | 0.064865 | 0.965428 | 0.000839 | 0.000839 | 0.000504 |
| Pitch Shift | 0.719055 | 0.399117 | nan | 0.247711 | 0.028839 | 0.005493 | 0.079529 | 0.922182 | 0.001343 | 0.000839 | 0.000290 |
| Bubbles | 0.985504 | 0.735916 | 0.768127 | nan | 0.116490 | 0.052292 | 0.069838 | 0.990875 | 0.005493 | 0.003815 | 0.001343 |
| Pop | 0.983231 | 0.996185 | 0.974670 | 0.883510 | nan | 0.247711 | 0.430130 | 0.999496 | 0.079529 | 0.079529 | 0.046722 |
| Retune | 0.992249 | 0.990875 | 0.995407 | 0.953278 | 0.768127 | nan | 0.454780 | 0.999786 | 0.087677 | 0.046722 | 0.009125 |
| DJ | 0.996185 | 0.941666 | 0.928070 | 0.930162 | 0.589554 | 0.545220 | nan | 0.999161 | 0.044201 | 0.007751 | 0.019318 |
| Piano | 0.096405 | 0.034572 | 0.077818 | 0.010696 | 0.000656 | 0.000290 | 0.001068 | nan | 0.000015 | 0.000046 | 0.000031 |
| Harmony | 0.999893 | 0.999344 | 0.998932 | 0.995407 | 0.935135 | 0.920471 | 0.955799 | 1.000000 | nan | 0.449966 | 0.530060 |
| Reverb | 0.999847 | 0.999496 | 0.999344 | 0.996857 | 0.928070 | 0.958374 | 0.993454 | 0.999969 | 0.569870 | nan | 0.719055 |
| Whisper | 0.999969 | 0.999619 | 0.999786 | 0.998932 | 0.958374 | 0.992249 | 0.983231 | 0.999985 | 0.489975 | 0.298294 | nan |
hmp_vals = []
adj_ps = []
for mode in pvaluedf.columns:
hmp, adj_p = calc_hmp(pvaluedf[[mode]].T, L=int(np.prod(pvaluedf.shape)-pvaluedf.shape[0]))
if adj_p is not None:
hmp_vals.append(hmp)
adj_ps.append(adj_p)
style_df(pd.concat(adj_ps).T, (np.array(hmp_vals)[:,None]*np.ones((1, len(adj_ps[0])))).T)
| Modes | Retune | Harmony | Reverb | Whisper |
|---|---|---|---|---|
| Modes | ||||
| Raw voice | 1.000000 | 0.019609 | 0.029077 | 0.005300 |
| Delay | 1.000000 | 0.201856 | 0.201856 | 0.089677 |
| Pitch Shift | 0.999999 | 0.463835 | 0.201856 | 0.042372 |
| Bubbles | 1.000000 | 0.999999 | 0.999477 | 0.463835 |
| Pop | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| Retune | nan | 1.000000 | 1.000000 | 1.000000 |
| DJ | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| Piano | 0.042372 | 0.001710 | 0.005300 | 0.003478 |
| Harmony | 1.000000 | nan | 1.000000 | 1.000000 |
| Reverb | 1.000000 | 1.000000 | nan | 1.000000 |
| Whisper | 1.000000 | 1.000000 | 1.000000 | nan |
no_nf_modes = df.index.drop(NON_FEEDBACK_MODES)
pvalues = np.nan * np.ones((len(no_nf_modes), len(no_nf_modes)))
pvalues = pvalues.astype(np.float64)
for idx1, mode1 in enumerate(no_nf_modes):
for idx2, mode2 in enumerate(no_nf_modes):
if idx1 >= idx2:
continue
p21 = wilcoxon(df.loc[mode1], df.loc[mode2], alternative="two-sided")
pvalues[idx1, idx2] = p21.pvalue
pvaluedf = pd.DataFrame(pvalues, columns=no_nf_modes, index=no_nf_modes)
/usr/local/lib/python3.7/dist-packages/scipy/stats/morestats.py:3141: UserWarning: Exact p-value calculation does not work if there are ties. Switching to normal approximation.
hmp, adj_p = calc_hmp(pvaluedf)
print(hmp)
style_df(adj_p, hmp)
0.0006454048535770217
| Modes | Raw voice | Delay | Pitch Shift | Bubbles | Pop | Retune | DJ | Piano | Harmony | Reverb | Whisper |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Modes | |||||||||||
| Raw voice | nan | 0.463745 | 0.596588 | 0.033539 | 0.044312 | 0.018250 | 0.009186 | 0.192810 | 0.000305 | 0.000427 | 0.000092 |
| Delay | nan | nan | 0.798233 | 0.596588 | 0.009186 | 0.021393 | 0.129730 | 0.069144 | 0.001678 | 0.001678 | 0.001007 |
| Pitch Shift | nan | nan | nan | 0.495422 | 0.057678 | 0.010986 | 0.159058 | 0.155635 | 0.002686 | 0.001678 | 0.000580 |
| Bubbles | nan | nan | nan | nan | 0.232979 | 0.104584 | 0.139676 | 0.021393 | 0.010986 | 0.007629 | 0.002686 |
| Pop | nan | nan | nan | nan | nan | 0.495422 | 0.860260 | 0.001312 | 0.159058 | 0.159058 | 0.093445 |
| Retune | nan | nan | nan | nan | nan | nan | 0.909561 | 0.000580 | 0.175354 | 0.093445 | 0.018250 |
| DJ | nan | nan | nan | nan | nan | nan | nan | 0.002136 | 0.088402 | 0.015503 | 0.038635 |
| Piano | nan | nan | nan | nan | nan | nan | nan | nan | 0.000031 | 0.000092 | 0.000061 |
| Harmony | nan | nan | nan | nan | nan | nan | nan | nan | nan | 0.899933 | 0.979950 |
| Reverb | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | 0.596588 |
| Whisper | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
no_nf_modes = df.index.drop(NON_FEEDBACK_MODES)
pvalues = np.nan * np.ones((len(no_nf_modes), len(no_nf_modes)))
pvalues = pvalues.astype(np.float64)
for idx1, mode1 in enumerate(no_nf_modes):
for idx2, mode2 in enumerate(no_nf_modes):
if mode1 == mode2:
continue
p21 = wilcoxon(df.loc[mode1], df.loc[mode2], alternative="two-sided")
pvalues[idx1, idx2] = p21.pvalue
pvaluedf = pd.DataFrame(pvalues, columns=no_nf_modes, index=no_nf_modes)
/usr/local/lib/python3.7/dist-packages/scipy/stats/morestats.py:3141: UserWarning: Exact p-value calculation does not work if there are ties. Switching to normal approximation.
hmp_vals = []
adj_ps = []
for mode in pvaluedf.columns:
hmp, adj_p = calc_hmp(pvaluedf[[mode]].T, L=int(np.prod(pvaluedf.shape)-pvaluedf.shape[0])/2)
if adj_p is not None:
hmp_vals.append(hmp)
adj_ps.append(adj_p)
The green cells are then adjusted p-values less than alpha
style_df(pd.concat(adj_ps).T, (np.array(hmp_vals)[:,None]*np.ones((1, len(adj_ps[0])))).T)
| Modes | Raw voice | Delay | Pitch Shift | Retune | Piano | Harmony | Reverb | Whisper |
|---|---|---|---|---|---|---|---|---|
| Modes | ||||||||
| Raw voice | nan | 1.000000 | 1.000000 | 0.999999 | 1.000000 | 0.019352 | 0.028520 | 0.005281 |
| Delay | 1.000000 | nan | 1.000000 | 1.000000 | 1.000000 | 0.181632 | 0.181632 | 0.084893 |
| Pitch Shift | 1.000000 | 1.000000 | nan | 0.999579 | 1.000000 | 0.390806 | 0.181632 | 0.041215 |
| Bubbles | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.999579 | 0.987866 | 0.390806 |
| Pop | 1.000000 | 0.997515 | 1.000000 | 1.000000 | 0.124340 | 1.000000 | 1.000000 | 1.000000 |
| Retune | 0.999999 | 1.000000 | 0.999579 | nan | 0.041215 | 1.000000 | 1.000000 | 0.999999 |
| DJ | 0.997515 | 1.000000 | 1.000000 | 1.000000 | 0.268466 | 1.000000 | 0.999991 | 1.000000 |
| Piano | 1.000000 | 1.000000 | 1.000000 | 0.041215 | nan | 0.001708 | 0.005281 | 0.003469 |
| Harmony | 0.019352 | 0.181632 | 0.390806 | 1.000000 | 0.001708 | nan | 1.000000 | 1.000000 |
| Reverb | 0.028520 | 0.124340 | 0.181632 | 1.000000 | 0.005281 | 1.000000 | nan | 1.000000 |
| Whisper | 0.005281 | 0.084893 | 0.041215 | 0.999999 | 0.003469 | 1.000000 | 1.000000 | nan |